{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "is_executing": true
    }
   },
   "outputs": [],
   "source": [
    "from chdb import session\n",
    "import time\n",
    "import tempfile\n",
    "import os\n",
    "\n",
    "print(\"Connecting to chdb session...\")\n",
    "chs = session.Session()\n",
    "\n",
    "temp_csv = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False)\n",
    "temp_csv.write(\"movieId,embedding\\n\")  # Header\n",
    "\n",
    "# Generate 10,000 rows of test data\n",
    "for i in range(1, 10001):\n",
    "    embedding = [float(i + j * 0.1) for j in range(10)]\n",
    "    embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n",
    "    temp_csv.write(f'{i},\"{embedding_str}\"\\n')\n",
    "\n",
    "temp_csv.close()\n",
    "csv_path = temp_csv.name\n",
    "\n",
    "# Setup database and table\n",
    "print(\"\\n=== Setup Phase ===\")\n",
    "chs.query(\"CREATE DATABASE IF NOT EXISTS test ENGINE = Atomic\")\n",
    "chs.query(\"USE test\")\n",
    "chs.query('DROP TABLE IF EXISTS embeddings')\n",
    "\n",
    "chs.query(\"\"\"CREATE TABLE embeddings (\n",
    "      movieId UInt32 NOT NULL,\n",
    "      embedding Array(Float32) NOT NULL\n",
    "  ) ENGINE = MergeTree()\n",
    "  ORDER BY movieId\"\"\")\n",
    "\n",
    "# Test 1: INFILE insertion (10k rows)\n",
    "print(\"\\n=== Test 1: INFILE Insertion (10k rows) ===\")\n",
    "start_time = time.time()\n",
    "try:\n",
    "    result = chs.query(f\"INSERT INTO embeddings FROM INFILE '{csv_path}' FORMAT CSV\")\n",
    "    infile_time = time.time() - start_time\n",
    "    print(f\"✓ INFILE insertion successful! Time: {infile_time:.3f}s\")\n",
    "    \n",
    "    count = chs.query('SELECT COUNT(*) as count FROM embeddings')\n",
    "    print(f\"Records inserted via INFILE: {count}\")\n",
    "    \n",
    "    if count != '0':\n",
    "        print(\"Sample data from INFILE:\")\n",
    "        sample = chs.query('SELECT movieId, embedding FROM embeddings ORDER BY movieId LIMIT 3')\n",
    "        print(sample)\n",
    "        \n",
    "except Exception as e:\n",
    "    print(f\"✗ INFILE insertion failed: {e}\")\n",
    "    infile_time = 0\n",
    "\n",
    "# Test 2: Regular insertion (10 additional rows)\n",
    "print(\"\\n=== Test 2: Regular VALUES Insertion (10 rows) ===\")\n",
    "start_time = time.time()\n",
    "try:\n",
    "    # Insert 10 additional rows with movieId starting from 20001\n",
    "    for i in range(20001, 20011):\n",
    "        embedding = [float(i + j * 0.1) for j in range(10)]\n",
    "        embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n",
    "        chs.query(f\"INSERT INTO embeddings VALUES ({i}, {embedding_str})\")\n",
    "    \n",
    "    values_time = time.time() - start_time\n",
    "    print(f\"✓ VALUES insertion successful! Time: {values_time:.3f}s\")\n",
    "    \n",
    "except Exception as e:\n",
    "    print(f\"✗ VALUES insertion failed: {e}\")\n",
    "    values_time = 0\n",
    "\n",
    "# Test 3: Verify total count\n",
    "print(\"\\n=== Test 3: Count Verification ===\")\n",
    "try:\n",
    "    total_count = chs.query('SELECT COUNT(*) as total FROM embeddings')\n",
    "    print(f\"Total records in embeddings table: {total_count}\")\n",
    "    \n",
    "    # Count by range\n",
    "    infile_count = chs.query('SELECT COUNT(*) as infile_count FROM embeddings WHERE movieId <= 10000')\n",
    "    values_count = chs.query('SELECT COUNT(*) as values_count FROM embeddings WHERE movieId >= 20001')\n",
    "    \n",
    "    print(f\"Records from INFILE (movieId <= 10000): {infile_count}\")\n",
    "    print(f\"Records from VALUES (movieId >= 20001): {values_count}\")\n",
    "    \n",
    "    # Sample from both ranges\n",
    "    print(\"\\nSample from INFILE data:\")\n",
    "    print(chs.query('SELECT movieId, embedding FROM embeddings WHERE movieId <= 10000 ORDER BY movieId LIMIT 2'))\n",
    "    \n",
    "    print(\"Sample from VALUES data:\")\n",
    "    print(chs.query('SELECT movieId, embedding FROM embeddings WHERE movieId >= 20001 ORDER BY movieId LIMIT 2'))\n",
    "    \n",
    "except Exception as e:\n",
    "    print(f\"Count verification error: {e}\")\n",
    "\n",
    "# Test 4: Direct CSV engine reading\n",
    "print(\"\\n=== Test 4: CSV Engine Direct Reading ===\")\n",
    "try:\n",
    "    print(\"Reading generated CSV file directly using CSV engine:\")\n",
    "    \n",
    "    # Method 1: Using file() function\n",
    "    csv_count1 = chs.query(f\"SELECT COUNT(*) as csv_count FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String')\")\n",
    "    print(f\"CSV file rows (via file() function): {csv_count1}\")\n",
    "    \n",
    "    # Method 2: Using CSV table engine directly\n",
    "    print(\"Sample rows from CSV file:\")\n",
    "    csv_sample = chs.query(f\"SELECT movieId, embedding FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String') ORDER BY movieId LIMIT 3\")\n",
    "    print(csv_sample)\n",
    "    \n",
    "    print(\"Last few rows from CSV file:\")\n",
    "    csv_tail = chs.query(f\"SELECT movieId, embedding FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String') ORDER BY movieId DESC LIMIT 3\")\n",
    "    print(csv_tail)\n",
    "    \n",
    "except Exception as e:\n",
    "    print(f\"CSV engine reading error: {e}\")\n",
    "\n",
    "# Cleanup\n",
    "print(\"\\n=== Cleanup ===\")\n",
    "try:\n",
    "    os.unlink(csv_path)\n",
    "    print(\"✓ Temporary CSV file cleaned up\")\n",
    "except Exception as e:\n",
    "    print(f\"Warning: Could not clean up temporary file: {e}\")\n",
    "\n",
    "print(f\"\\n=== Performance Summary ===\")\n",
    "if infile_time > 0:\n",
    "    print(f\"INFILE insertion (10k rows): {infile_time:.3f}s\")\n",
    "if values_time > 0:\n",
    "    print(f\"VALUES insertion (10 rows): {values_time:.3f}s\")\n",
    "    if infile_time > 0:\n",
    "        print(f\"INFILE is {values_time/infile_time*1000:.1f}x faster per 1000 rows\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
